# -*- coding:utf-8 -*-

from scrapy import Request
from scrapy.spiders import Spider
from scrapyExercise.items import SeabornFileItem

class SeabornSpider(Spider):
    name = "seaborn"

    def start_requests(self):
        url = "http://seaborn.pydata.org/examples/index.html"
        yield Request(url)

    def parse(self, response, **kwargs):
        urls = response.xpath('//div[@class="figure align-center"]/a/@href').extract()
        for u in urls:
            # u是相对地址，补全绝对地址
            url = response.urljoin(u)
            yield Request(url, callback=self.parse_file)

    def parse_file(self, response, **kwargs):
        href = response.xpath('//url')
        # href是相对地址，补全绝对地址
        url = response.urljoin(href)
        item = SeabornFileItem()
        item['file_urls'] = [url]
        yield item

